#############################################################################################
#############################################################################################
#########     REPLICATION FOR:                                                ###############
#########     RISE OF GRASSROOTS CIVIL SOCIETY UNDER ONE-PARTY RULE           ###############
#############################################################################################
#############################################################################################

# Authors: 
#   
#   Yu Zeng (Asst. Prof, School of Government, Peking University)
#   Junyan Jiang (Asst. Prof, Dept of Political Science, Columbia University)
#   Jie Li (PhD candidate, Department of East Asian Studies, University of Vienna)
#   Christian Goebel (University Professor of Modern China Studies, Department of East Asian Studies, University of Vienna)

# Version: 1.0
# Tested under:
## R version 3.6.1
## R package "texreg" version: 1.36.23
## R package "stargazer" version: 5.2.2
## Environment: Windows 11 x64, Intel i9-9980HK CPU, 32GB RAM


rm(list=ls())

require(devtools)
install_version("texreg", version = "1.36.23", repos = "http://cran.us.r-project.org")

library(haven)
require(survival)
library(ivtools)
require(dplyr)
library(stargazer)
library(lme4)
library(splines)
library(AER)
library(ivprobit)
library(texreg)

setwd("C:/Users/Ricardo/OneDrive/Documents/Research/ERCResponsiveness/Workspace/submission/")


source("texreg.ivcox.R")
environment(texreg.ivcox) <- asNamespace('texreg')

## Create method for extracting ivcoxph results
extract.ivcoxph <- function(model, summary, N, include.nobs = TRUE)
{
  m <- model
  s <- summary
  s$coefficients <- s$coefficients[1:N,]
  coefficient.names <- rownames(s$coefficients)
  coefficients <- s$coefficients[, 1]
  standard.errors <- s$coefficients[, 2]
  significance <- s$coefficients[, 4]
  n <- length(m$fitY.LX$residuals)
  gof <- numeric()
  gof.names <- character()
  gof.decimal <- logical()
  if (include.nobs == TRUE) {
    gof <- c(gof, n)
    gof.names <- c(gof.names, "Num. obs.")
    gof.decimal <- c(gof.decimal, FALSE)
  }
  tr <- createTexreg(coef.names = coefficient.names, coef = coefficients, 
                     se = standard.errors, pvalues = significance, gof.names = gof.names, 
                     gof = gof, gof.decimal = gof.decimal)
  return(tr)
}
setMethod("extract", signature = className("ivcoxph", "ivmod"), definition = extract.ivcoxph)

## Dataframe
homeowners <- read_dta("replication_data.dta")
homeowners <- homeowners[homeowners$year>2007 & homeowners$year<2018, ]
survdata.post2008 <- data.frame(homeowners)
survdata.post2008 <- survdata.post2008[survdata.post2008$samp==1, ] # Coding pro-HOA policy ("fPolicyHome") as 1
survdata.post2008 <- survdata.post2008[!is.na(survdata.post2008$homeownersCompltPct), ]
data <- survdata.post2008
tzero <- data$timezero
tpolicy <- data$timepolicy
stat <- data$status

## Dependent variable
depvar_t <- "Surv(tzero, tpolicy, stat)"
depvar_l <- "fPolicyHome"

## Endogenous var: homeowners' complaint
indvar <- "homeownersCompltPct"

## IV: land revenue ratio four years before T under previous leader
IV_diff <- "diffleaderXL5loglandrev_rev"
IV4_diff <- "diffleaderXL4loglandrev_rev"
IV3_diff <- "diffleaderXL3loglandrev_rev"

IV2 <- "ivOthersMean"
IV2_alt <- "logUrbanNonHomeOthersMean" 

## Controls
controlvar <- c("loggdppc", "logpropprice", "loglandrev_rev")
controlvarlabel <- c("Log GDP per capita", "Log property price", "Log land revenue ratio")

controlvar_prov <- c(controlvar, "provHomeSum", "provPolicy")
controlvarlabel_prov <- c(controlvarlabel, "Neighboring city regulations", "Provincial regulation")

control_leader <- c("msec_localtime", "mayor_localtime", "msec_tenure", "mayor_tenure", "msec2currentsec", "mayor2currentsec", 
                    "msec_age", "mayor_age", "msec_highestedu", "mayor_highestedu", "msec_sex", "mayor_sex")
control_leader_label <- c("City secretary's local time", "City mayor's local time", "City secretary's tenure", "City mayor's tenure", "City secretary's connection", "City mayor's connection", 
                          "City secretary's age", "City mayor's age","City secretary's education", "City mayor's education", "City secretary's gender (female=1)", "City mayor's gender (female=1)")

controlvar_prov_leader <- c(controlvar_prov, control_leader)
controlvarlabel_prov_leader <- c(controlvarlabel_prov, control_leader_label)



###################################################################
## Table 2. Main Results    

## Model 1: Cox no control
cox.zero.formula <- paste(depvar_t, paste(c(indvar, "cluster(cityid)"), collapse=" + "), sep=" ~ ")
cox.zero <- coxph(formula=as.formula(cox.zero.formula), data=data, robust = TRUE, method="breslow")
cox.zero.sum <- summary(cox.zero)

## Model 2: Cox baseline 
cox.base.formula <- paste(depvar_t, paste(c(indvar, controlvar_prov, "strata(vpcity)", "cluster(cityid)"), collapse=" + "), sep=" ~ ")
cox.base <- coxph(formula=as.formula(cox.base.formula), data=data, robust = TRUE, method="breslow")
cox.base.sum <- summary(cox.base)

## Model 3: Cox with leaders characteristics (2008-2017) 
cox.leader.formula <- paste(depvar_t, paste(c(indvar, controlvar_prov_leader, "strata(vpcity)", "cluster(cityid)"), collapse=" + "), sep=" ~ ")
cox.leader <- coxph(formula=as.formula(cox.leader.formula), data=data, robust = TRUE, method="breslow")
cox.leader.sum <- summary(cox.leader)

## Model 4: IV Cox w/ no control
# First stage
fitX.LZ0.formula <- paste(indvar, paste(c(IV_diff, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")
fitX.LZ0 <- glm(formula = as.formula(fitX.LZ0.formula), data=data)
fitX.LZ0.sum <- summary(fitX.LZ0)
# Second stage
fitIV0 <- ivcoxph(estmethod="ts", fitX.LZ=fitX.LZ0, fitT.LX=cox.zero, data=data, ctrl=TRUE, clusterid="cityid")
fitIV0.sum <- summary(fitIV0)
fitIV0.out <- extract.ivcoxph(fitIV0, fitIV0.sum, 2)

## Model 5: IV Cox
# First stage
fitX.LZ.formula <- paste(indvar, paste(c(IV_diff, controlvar_prov, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")
fitX.LZ <- glm(formula = as.formula(fitX.LZ.formula), data=data)
fitX.LZ.sum <- summary(fitX.LZ)
# Second stage
fitIV <- ivcoxph(estmethod="ts", fitX.LZ=fitX.LZ, fitT.LX=cox.base, data=data, ctrl=TRUE, clusterid="cityid")
fitIV.sum <- summary(fitIV)
fitIV.out <- extract.ivcoxph(fitIV, fitIV.sum, 6)

## Model 6: IV Cox with leaders characteristics
fitX.LZ.control.formula <- paste(indvar, paste(c(IV_diff, controlvar_prov_leader, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")
fitX.LZ.control <- glm(formula = as.formula(fitX.LZ.control.formula), data=data)
fitX.LZ.control.sum <- summary(fitX.LZ.control)
fitIV.control <- ivcoxph(estmethod="ts", fitX.LZ=fitX.LZ.control, fitT.LX=cox.leader, data=data, ctrl=TRUE, clusterid="cityid")
fitIV.control.sum <- summary(fitIV.control)
# fitIV.control.sum
fitIV.control.out <- extract.ivcoxph(fitIV.control, fitIV.control.sum, 16)

## Output
texreg.ivcox(list(cox.zero, cox.base, cox.leader, fitIV0.out, fitIV.out, fitIV.control.out), 
            stars = c(0.1, 0.01, 0.05), 
            dep.var = "Policy promoting homeowner association",
            custom.model.names= c("Cox", "Cox", "Cox", "IV Cox", "IV Cox", "IV Cox"),
            custom.coef.names = c("% homeowner complaints", controlvarlabel_prov),
            omit.coef = "(msec)|(mayor)|R", 
            add.lines.sep = FALSE, add.lines = list(c("Strata: administrative level", "", "Y", "Y", "", "Y", "Y"), 
                                                   c("Leadership covariates", "", "", "Y", "", "", "Y")))

## Wald test
data_wald <- data[!is.na(data$diffleaderXL5loglandrev_rev), ]
fitX.LZ.control.formula.noIV <- paste(indvar, paste(c(controlvar_prov_leader, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")
mod1 <- lm(formula = as.formula(fitX.LZ.control.formula.noIV), data=data_wald)
mod2 <- lm(formula = as.formula(fitX.LZ.control.formula), data=data_wald)
waldtest(mod2, mod1) 


## Test for exogeneity
data_HausWutest <- data[!is.na(data$diffleaderXL5loglandrev_rev), ]
data_HausWutest <- data_HausWutest[!is.na(data_HausWutest$loglandrev_rev), ]
data_HausWutest <- data_HausWutest[!is.na(data_HausWutest$loggdppc), ]
data_HausWutest <- data_HausWutest[!is.na(data_HausWutest$logpropprice), ]
first_stage <- lm(homeownersCompltPct ~ diffleaderXL5loglandrev_rev + loggdppc + logpropprice + loglandrev_rev + as.factor(cityid) + as.factor(year), data=data_HausWutest)
Hausman_reg <- lm(fPolicyHome ~ homeownersCompltPct + loggdppc + logpropprice + loglandrev_rev + first_stage$residuals + as.factor(cityid) + as.factor(year), data=data_HausWutest)
# print(summary(Hausman_reg))
HausWutest <- waldtest(Hausman_reg,.~.-first_stage$residuals)
print(HausWutest) 
# p-value = .85. Fail to reject the null hypothesis of instrument exogeneity (i.e., error term of the first stage is irrelevant, beta=0).


###################################################################
#### Table 3. Mechanism: Government vs. Business
##
 
indvar_bus <- "home2BusPct"          ## homevbus_pct: against business %
indvar_gov <- "home2GovPct"          ## homevgov_pct: against government %
indvar_govbus <- "home2busgovPct"    ## homevbusgov_pct: against both business and government %
indvar_others <- "homevothersPct"    ## homevothers_pct: against other homeowners %
data$homebusonly <- data$home2BusPct - data$home2busgovPct
data$homegovonly <- data$home2GovPct - data$home2busgovPct
indvar_busonly <- "homebusonly"
indvar_govonly <- "homegovonly"

cox.mech.formula <- paste(depvar_t, paste(c(indvar_busonly, indvar_govonly, indvar_govbus, indvar_others, controlvar_prov, "strata(vpcity)", "cluster(cityid)"), collapse=" + "), sep=" ~ ")
cox.mech <- coxph(formula = as.formula(cox.mech.formula), data=data, robust = TRUE, method="breslow")
fitX.mech.formula <- paste(indvar_busonly, paste(c(IV_diff, indvar_govonly, indvar_govbus, indvar_others, controlvar_prov, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")
fitX.mech <- glm(formula = as.formula(fitX.mech.formula), data=data)
fitIV.mech <- ivcoxph(estmethod="ts", fitX.LZ=fitX.mech, fitT.LX=cox.mech, data=data, ctrl=TRUE, clusterid="cityid")
fitIV.mech.sum <- summary(fitIV.mech)
fitIV.mech.out <- extract.ivcoxph(fitIV.mech, fitIV.mech.sum, 10)

cox.mech.formula2 <- paste(depvar_t, paste(c(indvar_busonly, indvar_govonly, indvar_govbus, indvar_others, controlvar_prov_leader, "strata(vpcity)", "cluster(cityid)"), collapse=" + "), sep=" ~ ")
cox.mech2 <- coxph(formula = as.formula(cox.mech.formula2), data=data, robust = TRUE, method="breslow")
fitX.mech.formula2 <- paste(indvar_busonly, paste(c(IV_diff, indvar_govonly, indvar_govbus, indvar_others, controlvar_prov_leader, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")
fitX.mech2 <- glm(formula = as.formula(fitX.mech.formula2), data=data)
fitIV.mech2 <- ivcoxph(estmethod="ts", fitX.LZ=fitX.mech2, fitT.LX=cox.mech2, data=data, ctrl=TRUE, clusterid="cityid")
fitIV.mech.sum2 <- summary(fitIV.mech2)
fitIV.mech.out2 <- extract.ivcoxph(fitIV.mech2, fitIV.mech.sum2, 19)

texreg.ivcox(list(cox.mech, cox.mech2, fitIV.mech.out, fitIV.mech.out2),
             stars = c(0.1, 0.05, 0.01), 
             dep.var = "Policy promoting homeowner association",
             custom.model.names= c("(1)", "(2)", "(3)", "(4)"),
             custom.coef.names = c("%  complaints to business only", "%  complaints to government only", "%  complaints to business and government", "% complaints to other homeowners", controlvarlabel_prov),
             omit.coef = "(msec)|(mayor)|R", 
             add.lines.sep = TRUE, add.lines = list(c("Strata: administrative level", "Y", "Y", "Y", "Y"), c("Leadership covariates", "", "Y", "", "Y")))



###################################################################
###########   Appendix                                   ##########
###################################################################

## Table A.1: Summary Statistics

outdf <- homeowners[c("fPolicyHome", "homeownersCompltPct", controlvar_prov, IV_diff, IV4_diff, IV3_diff, IV2, IV2_alt, "logdist", "loghhi_asset", "loggovemployee_pc", "logrev_pop", "urbanrate", "urbanrate_pop", "eduYear", control_leader)]
stargazer(as.data.frame(outdf),
          omit.summary.stat = c("p25", "p75"),
          covariate.labels = c("Policy promoting homeowner association", "% Homeowner complaints", controlvarlabel_prov,
                               "Land revenue dependency (t-5)", "Land revenue dependency (t-4)", "Land revenue dependency (t-3)", 
                               "Neighbors' average land revenue dependency (t-5)", "Neighbors' average urban nonhomeowner complaints",
                               "Log distance to provincial capital", "Log Herfindahl index: real estate (2008)", "Log # of government employees per capita (2008)", 
                               "Log revenue per capita", "% Urban area", "% Urban population", "Average education years (2010)",
                               control_leader_label))


## Table A.5: correlation matrix
cor.matrix <- cor(homeowners[, c("homeownersCompltPct", controlvar_prov)], use = "complete.obs")
rownames(cor.matrix) <- c("% homeowners' complaint", controlvarlabel_prov)
colnames(cor.matrix) <- c("% homeowners' complaint", controlvarlabel_prov)
stargazer(cor.matrix, title="Correlation Matrix", rownames = TRUE, colnames = TRUE)


#################################################################
##########  Robustness  #########################################

## Table A.6: Alternative coding scheme of dependent variable (improved regulation coded as 1)

survdata1 <- data.frame(homeowners)
survdata1 <- survdata1[survdata1$samp_i==1, ]
survdata1 <- survdata1[!is.na(survdata1$homeownersCompltPct), ]
data1 <- survdata1
tzero1 <- data1$timezero_i
tpolicy1 <- data1$timepolicy_i
stat1 <- data1$status_i
depvar_t1 <- "Surv(tzero1, tpolicy1, stat1)"

# repeat Cox no control
cox.zero.formula1 <- paste(depvar_t1, paste(c(indvar, "cluster(cityid)"), collapse=" + "), sep=" ~ ")
cox.zero1 <- coxph(formula=as.formula(cox.zero.formula1), data=data1, robust = TRUE, method="breslow")
cox.zero.sum1 <- summary(cox.zero1)

# repeat baseline Cox
cox.base.formula1 <- paste(depvar_t1, paste(c(indvar, controlvar_prov, "strata(vpcity)", "cluster(cityid)"), collapse=" + "), sep=" ~ ")
cox.base1 <- coxph(formula=as.formula(cox.base.formula1), data=data1, robust = TRUE, method="breslow")
cox.base.sum1 <- summary(cox.base1)

## repeat baseline Cox with leaders & provincial controls 
cox.leader.formula1 <- paste(depvar_t1, paste(c(indvar, controlvar_prov_leader, "strata(vpcity)", "cluster(cityid)"), collapse=" + "), sep=" ~ ")
cox.leader1 <- coxph(formula=as.formula(cox.leader.formula1), data=data1, robust = TRUE, method="breslow")
cox.leader.sum1 <- summary(cox.leader1)

## Output
stargazer(cox.zero1, cox.base1, cox.leader1, 
          dep.var.caption = "Policy promoting homeowner association", 
          keep=c(indvar, controlvar_prov),
          covariate.labels = c("% homeowner complaint", controlvarlabel_prov),
          digits = 2, add.lines.sep = TRUE, add.lines = list(c("Strata: administrative level", "", "Y", "Y"), c("Leadership covariates", "", "", "Y")))


## Table A.7: Alternative endogenous var (city level only)

indvar2 <- "homecityPct"

cox.base.formula2 <- paste(depvar_t, paste(c(indvar2, controlvar_prov, "strata(vpcity)", "cluster(cityid)"), collapse=" + "), sep=" ~ ")
cox.base2 <- coxph(formula=as.formula(cox.base.formula2), data=data, robust = TRUE, method="breslow")
cox.base.sum2 <- summary(cox.base2)

fitX.LZ.formula2 <- paste(indvar2, paste(c(IV_diff, controlvar_prov, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")
fitX.LZ2 <- glm(formula = as.formula(fitX.LZ.formula2), data=data)
fitX.LZ.sum2 <- summary(fitX.LZ2)
fitIV2 <- ivcoxph(estmethod="ts", fitX.LZ=fitX.LZ2, fitT.LX=cox.base2, data=data, ctrl=TRUE, clusterid="cityid")
fitIV.sum2 <- summary(fitIV2)
fitIV.out2 <- extract.ivcoxph(fitIV2, fitIV.sum2, 6)

## repeat baseline Cox with leaders & provincial controls 
cox.leader.formula2 <- paste(depvar_t, paste(c(indvar2, controlvar_prov_leader, "strata(vpcity)", "cluster(cityid)"), collapse=" + "), sep=" ~ ")
cox.leader2 <- coxph(formula=as.formula(cox.leader.formula2), data=data, robust = TRUE, method="breslow")
cox.leader.sum2 <- summary(cox.leader2)

## repeat IV Cox with leaders controls 
fitX.LZ.control.formula2 <- paste(indvar2, paste(c(IV_diff, controlvar_prov_leader, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")
fitX.LZ.control2 <- glm(formula = as.formula(fitX.LZ.control.formula2), data=data)
fitX.LZ.control.sum2 <- summary(fitX.LZ.control2)

fitIV.control2 <- ivcoxph(estmethod="ts", fitX.LZ=fitX.LZ.control2, fitT.LX=cox.leader2, data=data, ctrl=TRUE, clusterid="cityid")
fitIV.control.sum2 <- summary(fitIV.control2)
fitIV.control.out2 <- extract.ivcoxph(fitIV.control2, fitIV.control.sum2, 16)

## Output
texreg.ivcox(list(cox.base2, fitIV.out2, cox.leader2, fitIV.control.out2),
             stars = c(0.1, 0.01, 0.05), 
             dep.var = "Policy promoting homeowner association",
             custom.model.names= c("Cox", "IV Cox", "Cox", "IV Cox"),
             custom.coef.names = c("% homeowner complaint (city level)", controlvarlabel_prov),
             omit.coef = "(msec)|(mayor)", 
             add.lines.sep = TRUE, add.lines = list(c("Strata: administrative level", "Y", "Y", "Y", "Y"), 
                                                    c("Leadership covariates", "", "", "Y", "Y")))


## Table A.8: Alternative Estimation Model

logit.spl1 <- paste(depvar_l, paste(c(indvar, "ns(year,3)"), collapse=" + "), sep=" ~ ")
logit.spl2 <- paste(depvar_l, paste(c(indvar, controlvar_prov, "ns(year,3)"), collapse=" + "), sep=" ~ ")
logit.spl3 <- paste(depvar_l, paste(c(indvar, controlvar_prov_leader, "ns(year,3)"), collapse=" + "), sep=" ~ ")

fit.logit.spl1 <- miceadds::glm.cluster(logit.spl1, family=binomial(link="logit"), cluster="cityid", data=data)
summary(fit.logit.spl1)

fit.logit.spl2 <- miceadds::glm.cluster(logit.spl2, family=binomial(link="logit"), cluster="cityid", data=data)
summary(fit.logit.spl2)

fit.logit.spl3 <- miceadds::glm.cluster(logit.spl3, family=binomial(link="logit"), cluster="cityid", data=data)
summary(fit.logit.spl3)

## Required Texreg Version 1.38.6
# texreg(list(fit.logit.spl1, fit.logit.spl2, fit.logit.spl3),
#        stars = c(0.1, 0.01, 0.05), 
#        dep.var = "Policy promoting homeowners association",
#        custom.model.names= c("(1)", "(2)", "(3)"),
#        custom.coef.names = c("% homeowner complaint", controlvarlabel_prov),
#        omit.coef = "(year)|(Intercept)|(msec)|(mayor)")

#######################################
## Table A.9: Additional control variables

# Correcting NPH

data$logt_logdist = (data$year-2007)*data$logdist 
data$logt_loghhi_asset = (data$year-2007)*data$loghhi_asset
data$logt_loggovemployee_pc = (data$year-2007)*data$loggovemployee_pc

controlvar_prov2 <- c("logpropprice", "loglandrev_rev", "provHomeSum", "provPolicy", "loggdppc")
controlvarlabel_prov2 <- c("Log property price", "Log land revenue ratio", "Neighboring city regulations", "Provincial regulation", "Log GDP per capita")

controlvar_prov_leader2 <- c(controlvar_prov2, control_leader)
controlvarlabel_prov_leader2 <- c(controlvarlabel_prov2, control_leader_label)

cox.robust.formula <- paste(depvar_t, paste(c(indvar, controlvar_prov_leader2, "logt_logdist", "strata(vpcity)", "cluster(cityid)"), collapse=" + "), sep=" ~ ")
cox.robust <- coxph(formula=as.formula(cox.robust.formula), data=data, robust = TRUE, method="breslow")
cox.robust.sum <- summary(cox.robust)

cox.robust.formula2 <- paste(depvar_t, paste(c(indvar, controlvar_prov_leader2, "logt_loghhi_asset", "strata(vpcity)", "cluster(cityid)"), collapse=" + "), sep=" ~ ")
cox.robust2 <- coxph(formula=as.formula(cox.robust.formula2), data=data, robust = TRUE, method="breslow")
cox.robust.sum2 <- summary(cox.robust2)

cox.robust.formula3 <- paste(depvar_t, paste(c(indvar, controlvar_prov_leader2, "logt_loggovemployee_pc", "strata(vpcity)", "cluster(cityid)"), collapse=" + "), sep=" ~ ")
cox.robust3 <- coxph(formula=as.formula(cox.robust.formula3), data=data, robust = TRUE, method="breslow")
cox.robust.sum3 <- summary(cox.robust3)

cox.robust.formula4 <- paste(depvar_t, paste(c(indvar, controlvar_prov_leader2, "logrev_pop","strata(vpcity)", "cluster(cityid)"), collapse=" + "), sep=" ~ ")
cox.robust4 <- coxph(formula=as.formula(cox.robust.formula4), data=data, robust = TRUE, method="breslow")
cox.robust.sum4 <- summary(cox.robust4)

cox.robust.formula5 <- paste(depvar_t, paste(c(indvar, controlvar_prov_leader2, "urbanrate", "strata(vpcity)", "cluster(cityid)"), collapse=" + "), sep=" ~ ")
cox.robust5 <- coxph(formula=as.formula(cox.robust.formula5), data=data, robust = TRUE, method="breslow")
cox.robust.sum5 <- summary(cox.robust5)

cox.robust.formula6 <- paste(depvar_t, paste(c(indvar, controlvar_prov_leader2, "urbanrate_pop", "strata(vpcity)", "cluster(cityid)"), collapse=" + "), sep=" ~ ")
cox.robust6 <- coxph(formula=as.formula(cox.robust.formula6), data=data, robust = TRUE, method="breslow")
cox.robust.sum6 <- summary(cox.robust6)

# Column 7
data$EduYear = (data$year-2009)*data$eduYear
data_educontrol <- data[data$year>=2010, ]
tzero_edu <- data_educontrol$timezero
tpolicy_edu <- data_educontrol$timepolicy
stat_edu <- data_educontrol$status
depvar_t_edu <- "Surv(tzero_edu, tpolicy_edu, stat_edu)"

controlvar_edu <- c("logpropprice", "loglandrev_rev", "provHomeSum", "provPolicy", "EduYear")
controlvarlabel_edu <- c("Log property price", "Log land revenue ratio", "Neighboring city regulations", "Provincial regulation", "Avg Education Years")

controlvar_edu_prov_leader <- c(controlvar_edu, control_leader)
controlvarlabel_edu_prov_leader <- c(controlvarlabel_prov, control_leader_label)

cox.robust.edu.formula <- paste(depvar_t_edu, paste(c(indvar, controlvar_edu_prov_leader, "strata(vpcity)", "cluster(cityid)"), collapse=" + "), sep=" ~ ")
cox.robust.edu <- coxph(formula=as.formula(cox.robust.edu.formula), data=data_educontrol, robust = TRUE, method="breslow")
cox.robust.edu.sum <- summary(cox.robust.edu)

stargazer(cox.robust.edu, cox.robust, cox.robust2, cox.robust3, cox.robust4, cox.robust5, cox.robust6, 
          dep.var.caption = "Policy promoting homeowner association", 
          keep=c(indvar, "logpropprice", "loglandrev_rev", "provHomeSum", "provPolicy", "EduYear", "loggdppc", "logdist", "loghhi_asset", "loggovemployee_pc", "logrev_pop", "urbanrate_pop", "urbanrate"),
          covariate.labels = c("% Homeowner complaints", "Log property price", "Log land revenue ratio", "Neighboring city regulations", "Provincial regulation", 
                               "Avg education years", "Log GDP per capita", 
                               "Distance to prov capital", "Herfindahl index: real estate (2008)", "Log # of government employees per capita (2008)", 
                               "Log revenue per capita", "% urban area", "% urban population"),
          digits = 2, add.lines.sep = TRUE, no.space = TRUE, add.lines = list(c("Strata: administrative level", "Y", "Y", "Y", "Y", "Y", "Y", "Y"), c("Leadership covariates", "Y", "Y", "Y", "Y", "Y", "Y", "Y")))


## Table A.10: IV Cox with additional controls

fitX.LZ.robust.edu <- glm(formula = as.formula(paste(indvar, paste(c(IV_diff, controlvar_edu_prov_leader, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")), data=data_educontrol)
fitX.LZ.robust.edu.sum <- summary(fitX.LZ.robust.edu)
fitIV.robust.edu <- ivcoxph(estmethod="ts", fitX.LZ=fitX.LZ.robust.edu, fitT.LX=cox.robust.edu, data=data_educontrol, ctrl=TRUE, clusterid="cityid")
fitIV.robust.edu.sum <- summary(fitIV.robust.edu)
fitIV.robust.edu.out <- extract.ivcoxph(fitIV.robust.edu, fitIV.robust.edu.sum, 19)

fitX.LZ.robust <- glm(formula = as.formula(paste(indvar, paste(c(IV_diff, controlvar_prov_leader2, "logt_logdist", "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")), data=data)
fitX.LZ.robust.sum <- summary(fitX.LZ.robust)
fitIV.robust <- ivcoxph(estmethod="ts", fitX.LZ=fitX.LZ.robust, fitT.LX=cox.robust, data=data, ctrl=TRUE, clusterid="cityid")
fitIV.robust.sum <- summary(fitIV.robust)
fitIV.robust.out <- extract.ivcoxph(fitIV.robust, fitIV.robust.sum, 20)

fitX.LZ.robust2 <- glm(formula = as.formula(paste(indvar, paste(c(IV_diff, controlvar_prov_leader2, "logt_loghhi_asset", "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")), data=data)
fitX.LZ.robust2.sum <- summary(fitX.LZ.robust2)
fitIV.robust2 <- ivcoxph(estmethod="ts", fitX.LZ=fitX.LZ.robust2, fitT.LX=cox.robust2, data=data, ctrl=TRUE, clusterid="cityid")
fitIV.robust2.sum <- summary(fitIV.robust2)
fitIV.robust2.out <- extract.ivcoxph(fitIV.robust2, fitIV.robust2.sum, 20)

fitX.LZ.robust3 <- glm(formula = as.formula(paste(indvar, paste(c(IV_diff, controlvar_prov_leader2, "logt_loggovemployee_pc", "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")), data=data)
fitX.LZ.robust3.sum <- summary(fitX.LZ.robust3)
fitIV.robust3 <- ivcoxph(estmethod="ts", fitX.LZ=fitX.LZ.robust3, fitT.LX=cox.robust3, data=data, ctrl=TRUE, clusterid="cityid")
fitIV.robust3.sum <- summary(fitIV.robust3)
fitIV.robust3.out <- extract.ivcoxph(fitIV.robust3, fitIV.robust3.sum, 20)

fitX.LZ.robust4 <- glm(formula = as.formula(paste(indvar, paste(c(IV_diff, controlvar_prov_leader2, "logrev_pop", "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")), data=data)
fitX.LZ.robust4.sum <- summary(fitX.LZ.robust4)
fitIV.robust4 <- ivcoxph(estmethod="ts", fitX.LZ=fitX.LZ.robust4, fitT.LX=cox.robust4, data=data, ctrl=TRUE, clusterid="cityid")
fitIV.robust4.sum <- summary(fitIV.robust4)
fitIV.robust4.out <- extract.ivcoxph(fitIV.robust4, fitIV.robust4.sum, 20)

fitX.LZ.robust5 <- glm(formula = as.formula(paste(indvar, paste(c(IV_diff, controlvar_prov_leader2, "urbanrate", "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")), data=data)
fitX.LZ.robust5.sum <- summary(fitX.LZ.robust5)
fitIV.robust5 <- ivcoxph(estmethod="ts", fitX.LZ=fitX.LZ.robust5, fitT.LX=cox.robust5, data=data, ctrl=TRUE, clusterid="cityid")
fitIV.robust5.sum <- summary(fitIV.robust5)
fitIV.robust5.out <- extract.ivcoxph(fitIV.robust5, fitIV.robust5.sum, 20)

fitX.LZ.robust6 <- glm(formula = as.formula(paste(indvar, paste(c(IV_diff, controlvar_prov_leader2, "urbanrate_pop", "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")), data=data)
fitX.LZ.robust6.sum <- summary(fitX.LZ.robust6)
fitIV.robust6 <- ivcoxph(estmethod="ts", fitX.LZ=fitX.LZ.robust6, fitT.LX=cox.robust6, data=data, ctrl=TRUE, clusterid="cityid")
fitIV.robust6.sum <- summary(fitIV.robust6)
fitIV.robust6.out <- extract.ivcoxph(fitIV.robust6, fitIV.robust6.sum, 20)

texreg.ivcox(list(fitIV.robust.out, fitIV.robust2.out, fitIV.robust3.out, fitIV.robust4.out, fitIV.robust5.out, fitIV.robust6.out, fitIV.robust.edu.out), 
             stars = c(0.1, 0.01, 0.05), 
             dep.var = "Policy promoting homeowner association",
             custom.model.names= c("IV Cox", "IV Cox", "IV Cox", "IV Cox", "IV Cox", "IV Cox", "IV Cox"),
             custom.coef.names = c("% Homeowner complaints", "Log property price", "Log land revenue ratio", "Neighboring city regulations", "Provincial regulation", 
                                   "Log GDP per capita", 
                                   "Distance to prov capital", "Herfindahl index: real estate (2008)", "Log # of government employees per capita (2008)", 
                                   "Log revenue per capita", "% urban area", "% urban population", "Avg education years"),
             omit.coef = "(msec)|(mayor)|R", 
             add.lines = list(c("Strata: administrative level", "Y", "Y", "Y", "Y", "Y", "Y", "Y"), c("Leadership covariates", "Y", "Y", "Y", "Y", "Y", "Y", "Y")))


############################################################################
## Appendix D.3 Additional IV Tests

## Table A.11: Different lagged IVs
## Land revenue ratio t-3 under different leader
fitX.LZ3.formula <- paste(indvar, paste(c(IV3_diff, controlvar_prov, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")
fitX.LZ3 <- glm(formula=as.formula(fitX.LZ3.formula), data=data)
# summary(fitX.LZ3)
fitIV3 <- ivcoxph(estmethod="ts", fitX.LZ=fitX.LZ3, fitT.LX=cox.base, data=data, ctrl=TRUE, clusterid="cityid")
fitIV3.sum <- summary(fitIV3)
# fitIV3.sum
fitIV3.out <- extract.ivcoxph(fitIV3, fitIV3.sum, 6)

fitX.LZ3.control.formula <- paste(indvar, paste(c(IV3_diff, controlvar_prov_leader, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")
fitX.LZ3.control <- glm(formula=as.formula(fitX.LZ3.control.formula), data=data)
# summary(fitX.LZ3.control)
fitIV3.control <- ivcoxph(estmethod="ts", fitX.LZ=fitX.LZ3.control, fitT.LX=cox.leader, data=data, ctrl=TRUE, clusterid="cityid")
fitIV3.control.sum <- summary(fitIV3.control)
# fitIV3.control.sum
fitIV3.control.out <- extract.ivcoxph(fitIV3.control, fitIV3.control.sum, 16)

## Land revenue ratio t-4 under different leader
fitX.LZ4.formula <- paste(indvar, paste(c(IV4_diff, controlvar_prov, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")
fitX.LZ4 <- glm(formula=as.formula(fitX.LZ4.formula), data=data)
# summary(fitX.LZ4)
fitIV4 <- ivcoxph(estmethod="ts", fitX.LZ=fitX.LZ4, fitT.LX=cox.base, data=data, ctrl=TRUE, clusterid="cityid")
fitIV4.sum <- summary(fitIV4)
# fitIV4.sum
fitIV4.out <- extract.ivcoxph(fitIV4, fitIV4.sum, 6)

fitX.LZ4.control.formula <- paste(indvar, paste(c(IV4_diff, controlvar_prov_leader, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")
fitX.LZ4.control <- glm(formula=as.formula(fitX.LZ4.control.formula), data=data)
# summary(fitX.LZ4.control)
fitIV4.control <- ivcoxph(estmethod="ts", fitX.LZ=fitX.LZ4.control, fitT.LX=cox.leader, data=data, ctrl=TRUE, clusterid="cityid")
fitIV4.control.sum <- summary(fitIV4.control)
# fitIV4.control.sum
fitIV4.control.out <- extract.ivcoxph(fitIV4.control, fitIV4.control.sum, 16)

data_wald <- data[!is.na(data$diffleaderXL4loglandrev_rev), ]
fitX.LZ4.control.formula.noIV <- paste(indvar, paste(c(controlvar_prov_leader, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")
mod1 <- lm(formula = as.formula(fitX.LZ4.control.formula.noIV), data=data_wald)
mod2 <- lm(formula = as.formula(fitX.LZ4.control.formula), data=data_wald)
waldtest(mod2, mod1)

texreg.ivcox(list(fitIV3.out, fitIV3.control.out, fitIV4.out, fitIV4.control.out),
             stars = c(0.1, 0.01, 0.05), 
             dep.var = "Policy promoting homeowner association",
             custom.model.names= c("Cox", "IV Cox", "Cox", "IV Cox"),
             custom.coef.names = c("% Homeowner complaints", controlvarlabel_prov),
             omit.coef = "(msec)|(mayor)", 
             add.lines.sep = TRUE, add.lines = list(c("Strata: administrative level", "Y", "Y", "Y", "Y"), c("Leadership covariates", "", "", "Y", "Y")))


## Table A.12: Alternative IV estimation

## 2sls+spline
fiv_2sls.spl <- paste(depvar_l, paste(paste(c(indvar, controlvar_prov, "ns(year,3)"), collapse=" + "), " | ",
                                      paste(c(controlvar_prov, IV_diff, "ns(year,3)", "cluster(cityid)"), collapse=" + ")), sep=" ~ ") 
fitiv.2sls.spl <- ivreg(formula = fiv_2sls.spl, data=data)
# summary(fitiv.2sls.spl, vcov = sandwich, diagnostics = TRUE)

## 2sls + overid
fiv_2sls.overid.spl <- paste(depvar_l, paste(paste(c(indvar, controlvar_prov, "ns(year,3)"), collapse=" + "), " | ",
                                             paste(c(controlvar_prov, IV3_diff, IV4_diff, IV_diff, "ns(year,3)", "cluster(cityid)"), collapse=" + ")), sep=" ~ ") 
fitiv.2sls.overid.spl <- ivreg(formula = fiv_2sls.overid.spl, data=data)
# summary(fitiv.2sls.overid.spl, vcov = sandwich, diagnostics = TRUE)

## IV probit 
fiv_probit <- paste(depvar_l, paste(paste(c(controlvar_prov, "ns(year,3)"), collapse=" + "), " | ", indvar, " | ",
                                    paste(c(controlvar_prov, IV3_diff, IV4_diff, IV_diff, "ns(year,3)", "cluster(cityid)"), collapse=" + ")), sep=" ~ ")
fitiv.probit <- ivprobit(formula = as.formula(fiv_probit), data=data)
# summary(fitiv.probit, diagnostics=TRUE)

extract.ivprobit <- function(model, N, include.nobs = TRUE)
{
  m <- model
  N <- N+1
  ## 1: intercept
  coefficients <- m$coefficients[2:N,]
  standard.errors <- m$se[2:N]
  significance <- m$pval[2:N,]
  coefficient.names <- m$names[2:N]
  n <- nrow(m$mr1)
  gof <- numeric()
  gof.names <- character()
  gof.decimal <- logical()
  if (include.nobs == TRUE) {
    gof <- c(gof, n)
    gof.names <- c(gof.names, "Num. obs.")
    gof.decimal <- c(gof.decimal, FALSE)
  }
  tr <- createTexreg(coef.names = coefficient.names, coef = coefficients, 
                     se = standard.errors, pvalues = significance, gof.names = gof.names, 
                     gof = gof, gof.decimal = gof.decimal)
  return(tr)
}
fitiv.probit.out <- extract.ivprobit(fitiv.probit, 9)

texreg(list(fitiv.2sls.spl, fitiv.2sls.overid.spl, fitiv.probit.out),
       stars = c(0.1, 0.01, 0.05), 
       dep.var = "Policy promoting homeowners association",
       custom.model.names= c("2SLS+spline", "2SLS+spline", "IV Probit"),
       custom.coef.names = c("% homeowner complaint", controlvarlabel_prov),
       omit.coef = "(year)|(Intercept)")


## Test for overidentifying restrictions
data.overid <- data[!is.na(data$diffleaderXL5loglandrev_rev), ]
data.overid <- data.overid[!is.na(data.overid$diffleaderXL4loglandrev_rev), ]
data.overid <- data.overid[!is.na(data.overid$diffleaderXL3loglandrev_rev), ]
data.overid <- data.overid[!is.na(data.overid$loglandrev_rev), ]
data.overid <- data.overid[!is.na(data.overid$loggdppc), ]
data.overid <- data.overid[!is.na(data.overid$logpropprice), ]

overid_formula <- paste("fitiv.2sls.overid.spl$residuals", paste(c(IV_diff, IV4_diff, IV3_diff, controlvar_prov, "ns(year,3)", "cluster(cityid)"), collapse=" + "), sep=" ~ ")
overid <- lm(formula=as.formula(overid_formula), data=data.overid)
Sargan_reg <- summary(overid)
Sargan_test <- Sargan_reg$r.squared*nrow(data.overid)
print(Sargan_test)
print(1-pchisq(Sargan_test,3)) 
# p-value = .55. Fail to reject the null hypothesis of instrument validity (i.e., uncorrelated to the regression error terms). 



## Table A.14: Alternative IV

## Model 1: Alt IV Cox with IV2
# First stage
fitX.LZ.altiv.formula <- paste(indvar, paste(c(IV2, controlvar_prov, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")
fitX.LZ.altiv <- glm(formula = as.formula(fitX.LZ.altiv.formula), data=data)
fitX.LZ.altiv.sum <- summary(fitX.LZ.altiv)
# Second stage
fitIV.altiv <- ivcoxph(estmethod="ts", fitX.LZ=fitX.LZ.altiv, fitT.LX=cox.base, data=data, ctrl=TRUE, clusterid="cityid")
fitIV.altiv.sum <- summary(fitIV.altiv)
fitIV.altiv.out <- extract.ivcoxph(fitIV.altiv, fitIV.altiv.sum, 7)

## Model 2: Alt IV Cox with IV2 (with leaders characteristics)
fitX.LZ.control.formula <- paste(indvar, paste(c(IV2, controlvar_prov_leader, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")
fitX.LZ.altiv.control <- glm(formula = as.formula(fitX.LZ.control.formula), data=data)
fitX.LZ.altiv.control.sum <- summary(fitX.LZ.altiv.control)
fitIV.altiv.control <- ivcoxph(estmethod="ts", fitX.LZ=fitX.LZ.altiv.control, fitT.LX=cox.leader, data=data, ctrl=TRUE, clusterid="cityid")
fitIV.altiv.control.sum <- summary(fitIV.altiv.control)
fitIV.altiv.control.out <- extract.ivcoxph(fitIV.altiv.control, fitIV.altiv.control.sum, 19)

## Diagnostic
data_wald <- data[!is.na(data$ivOthersMean), ]

fitX.LZ.altiv.formula.noIV <- paste(indvar, paste(c(controlvar_prov, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")
mod1 <- lm(formula = as.formula(fitX.LZ.altiv.formula.noIV), data=data_wald)
mod2 <- lm(formula = as.formula(fitX.LZ.altiv.formula), data=data_wald)
waldtest(mod2, mod1) 

fitX.LZ.control.formula.noIV <- paste(indvar, paste(c(controlvar_prov_leader, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")
mod1 <- lm(formula = as.formula(fitX.LZ.control.formula.noIV), data=data_wald)
mod2 <- lm(formula = as.formula(fitX.LZ.control.formula), data=data_wald)
waldtest(mod2, mod1) 

## Model 3:Alt IV Cox with IV2_alt
# First stage
fitX.LZ.altiv.formula2 <- paste(indvar, paste(c(IV2_alt, controlvar_prov, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")
fitX.LZ.altiv2 <- glm(formula = as.formula(fitX.LZ.altiv.formula2), data=data)
fitX.LZ.altiv.sum2 <- summary(fitX.LZ.altiv2)
# Second stage
fitIV.altiv2 <- ivcoxph(estmethod="ts", fitX.LZ=fitX.LZ.altiv2, fitT.LX=cox.base, data=data, ctrl=TRUE, clusterid="cityid")
fitIV.altiv.sum2 <- summary(fitIV.altiv2)
fitIV.altiv.out2 <- extract.ivcoxph(fitIV.altiv2, fitIV.altiv.sum2, 7)

## Model 4: Alt IV Cox with IV2_alt (with leaders characteristics)
fitX.LZ.control.formula2 <- paste(indvar, paste(c(IV2_alt, controlvar_prov_leader, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")
fitX.LZ.altiv.control2 <- glm(formula = as.formula(fitX.LZ.control.formula2), data=data)
fitX.LZ.altiv.control.sum2 <- summary(fitX.LZ.altiv.control2)
fitIV.altiv.control2 <- ivcoxph(estmethod="ts", fitX.LZ=fitX.LZ.altiv.control2, fitT.LX=cox.leader, data=data, ctrl=TRUE, clusterid="cityid")
fitIV.altiv.control.sum2 <- summary(fitIV.altiv.control2)
fitIV.altiv.control.out2 <- extract.ivcoxph(fitIV.altiv.control2, fitIV.altiv.control.sum2, 19)

## Diagnostic
data_wald <- data[!is.na(data$logUrbanNonHomeOthersMean), ]

fitX.LZ.altiv.formula.noIV <- paste(indvar, paste(c(controlvar_prov, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")
mod1 <- lm(formula = as.formula(fitX.LZ.altiv.formula.noIV), data=data_wald)
mod2 <- lm(formula = as.formula(fitX.LZ.altiv.formula2), data=data_wald)
waldtest(mod2, mod1) 

fitX.LZ.control.formula.noIV <- paste(indvar, paste(c(controlvar_prov_leader, "as.factor(cityid)", "as.factor(year)"), collapse=" + "), sep=" ~ ")
mod1 <- lm(formula = as.formula(fitX.LZ.control.formula.noIV), data=data_wald)
mod2 <- lm(formula = as.formula(fitX.LZ.control.formula2), data=data_wald)
waldtest(mod2, mod1) 

## Output
texreg.ivcox(list(fitIV.altiv.out, fitIV.altiv.control.out, fitIV.altiv.out2, fitIV.altiv.control.out2), 
             stars = c(0.1, 0.01, 0.05), dep.var = "Policy promoting homeowner association",
             custom.model.names= c("IV Cox", "IV Cox", "IV Cox", "IV Cox"),
             custom.coef.names = c("% Homeowner complaints", controlvarlabel_prov),
             omit.coef = "(msec)|(mayor)|R", 
             add.lines.sep = TRUE, add.lines = list(c("Strata: administrative level", "Y", "Y", "Y", "Y"), 
                                                    c("Leadership covariates", "", "Y", "", "Y")))

